### Rachel Porter
### 12/1/2022
### Replication file for Black/White Analysis

rm(list=ls())

# load libraries
library("cobalt")
library("WeightIt")
library("ebal")
library("jtools")
library("survey")
library("sensemakr")
library("stargazer")
library("MASS")
library("arm")
library("ggplot2")

# clear environment and set working directory
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")

# load data for Black/White democrat analysis
original <- readRDS("white_democrats_data.rds")

# subsetting data into strategic/no groups for balancing
strategic <- subset(original, original$strategic == "Strategic")
not <- subset(original, original$strategic == "Not")

############################## WHITE PROFESSIONAL DEMOCRATS#################################

# main findings; Model 1 with weights, Table A8
w.out.strat <- weightit(black_number ~ quality_cand + openrace + dem_pres_av + 
                        primary_type + gf + gender + black_alone + south, 
                        data = strategic, estimand = "ATT", method = "ebal")
d.w.strat <- svydesign(ids = ~1, weights = w.out.strat$weights,
                        data = strategic)
fit.strat <- svyglm(narrow_race ~ black_number + quality_cand + openrace + 
                        dem_pres_av + primary_type + gf + gender + 
                        black_alone + south, design = d.w.strat, 
                        family=quasibinomial())

# appendix Model 1 without weights, Table A8
fit2 <- glm(narrow_race ~ black_number + quality_cand + openrace + dem_pres_av + 
                        primary_type + gf + gender + black_alone + south, 
                        data = strategic, family=binomial())

# appendix Model 2 with weights, Table A8
w.out.strat3 <- weightit(black_number ~ as.factor(qual_chall) + openrace + dem_pres_av + 
                         primary_type + gf + gender + south, 
                         data = strategic, estimand = "ATT", method = "ebal")
d.w.strat3 <- svydesign(ids = ~1, weights = w.out.strat3$weights,
                         data = strategic)
fit.strat5 <- svyglm(narrow_race ~ black_number + as.factor(qual_chall) + 
                         openrace + dem_pres_av + primary_type + gf + gender + 
                         south, design = d.w.strat3, family=quasibinomial())

# appendix Model 2 without weights, Table A8
fit6 <- glm(narrow_race ~ black_number + as.factor(qual_chall) + openrace + 
                         dem_pres_av + primary_type + gf + gender + black_alone + 
                         south, data = strategic, family=binomial())

# appendix Model 3, Table A8
w.out.strat2 <- weightit(black_number ~ quality_cand + openrace + ss_party + 
                         primary_type + gf + gender + black_dicho + south, 
                         data = strategic, estimand = "ATT", method = "ebal")
d.w.strat2 <- svydesign(ids = ~1, weights = w.out.strat$weights,
                         data = strategic)
fit.strat3 <- svyglm(narrow_race ~ black_number + quality_cand + openrace + 
                         ss_party + primary_type + gf + gender + black_dicho + 
                         south, design = d.w.strat2, family=quasibinomial())

# appendix Model 4, Table A8
fit4 <- glm(narrow_race ~ black_number*black_alone + quality_cand + openrace + 
                         dem_pres_av + primary_type + gf + gender + south, 
                         data = strategic, family=binomial())

#################### 
#### APPENDIX TABLE A8
#################### 
stargazer(fit.strat, fit2, fit.strat5,fit6, fit.strat3, fit4,
                         star.char = c("*", "*"),
                         star.cutoffs = c(0.05, 0.01))

# Simulating predicted probabilities for Figure 2
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.strat$coef
vcv <- vcov(fit.strat)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.strat$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
           betas = betas, sim.sd = apply(sim.betas, 2, sd), se = sqrt(diag(vcv)))   

# Set values for all other variables 
black_number <- c(0,1)
south1 <- c(0,0)
openrace1 <- c(0,0)
`primary_typeOpen Primary` <- c(0,0)
quality_cand2 <- c(1,1)
dem_pres_av <- c(55,55)
black_alone <- c(8,8)
gf1 <- c(0,0)
gender <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, black_number, quality_cand2, openrace1, dem_pres_av,
                    `primary_typeOpen Primary`, gf1, gender, black_alone, south1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(black_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

# compute first differences for Figure 2
pe_strat <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_strat, prob = .025)
hi <- quantile(pe_strat, prob = .975)

race_vector <- c(mean(pe_strat), lo, hi)

#################### 
## Love plots, APPENDIX FIGURE 6
#################### 
w.out.strat <- weightit(black_number ~ quality_cand + openrace + dem_pres_av + 
                          primary_type + gf + gender + black_alone + south, 
                        data = strategic, estimand = "ATT", method = "ebal")
w.out.strat2 <- weightit(black_number ~ quality_cand + openrace + dem_pres_av + 
                           primary_type + gf + gender + black_alone + south, 
                         data = strategic, estimand = "ATT", method = "cbps")

new.names.strat = c(quality_cand = "Experienced Candidate", 
                    openrace = "Open Seat", 
                    dem_pres_av = "District Presidential Vote", 
                    `primary_type_Open Primary` = "Open Primary", 
                    gf = "Pre/Post Protests", gender = "Female Candidate", 
                    black_alone = "% District Black Pop.",
                    ss_party_Competitive = "Two-Party Competitive", 
                    south = "Southern State")

love.plot(black_number ~ quality_cand + openrace + dem_pres_av + 
                    primary_type + gf + gender + black_alone + south,
                    data = strategic, estimand = "ATT",
                    stats = c("mean.diffs"),
                    weights = list(w1 = get.w(w.out.strat),
                                   w2 = get.w(w.out.strat2)),
                    var.order = "unadjusted",
                    abs = TRUE,
                    line = FALSE, 
                    title = NULL,
                    size =  5,
                    threshold = c(mean.diffs = .05,
                                  ks = .05),
                    var.names = new.names.strat,
                    colors = c("black", "black", "black"),
                    shapes = c("triangle filled", "circle filled", "square filled"),
                    sample.names = c("Unweighted", "Entropy Balanced", "CBPS Weighted"),
                    limits = list(mean.diffs = c(0, .55),
                                  ks = c(0, .55)),
                    wrap = 20, grid = FALSE,
                    position = "top",
                    themes = list(theme(text = element_text(size=20)),
                                  theme(text = element_text(size=20))))

#################### 
## Sensitivity Analysis, APPENDIX TABLE A13
#################### 

model <- lm(narrow_race ~ black_number + quality_cand + openrace + dem_pres_av +
                    primary_type + gf + gender + black_alone + south, 
                    data = strategic, weights = w.out.strat$weights)

question = sensemakr(model, treatment = "black_number",
                     benchmark_covariates = "quality_cand1",
                     kd = c(2,4,8))

summary(question)
ovb_minimal_reporting(question)

#################### 
## Contour Plots, APPENDIX FIGURE 10
#################### 

plot(question, cex.label.text = 1.1, label.bump.x = .08, label.bump.y = .01, 
                    cex.lab = 1.1)
plot(question, sensitivity.of = "t-value", cex.label.text = 1.1, 
                    label.bump.x = .08, label.bump.y = .01, cex.lab = 1.1)

rm(list=setdiff(ls(), c("not", "race_vector", "pe_strat")))

############################### WHITE AMATEUR DEMOCRATS#################################

# main findings; Model 1 with weights, Table A9
w.out.not <- weightit(black_number ~ openrace + dem_pres_av + primary_type + 
                      gf + gender + black_alone + south, 
                      data = not, estimand = "ATT", method = "ebal")
d.w.am <- svydesign(ids = ~1, weights = w.out.not$weights,
                      data = not)
fit.am <- svyglm(narrow_race ~ black_number + openrace + dem_pres_av + 
                      primary_type + gf + gender + black_alone + south, 
                      design = d.w.am, family=quasibinomial())

# appendix Model 1 without weights, Table A9
fit2 <- glm(narrow_race ~ black_number + openrace + dem_pres_av + primary_type + 
                      gf + gender + black_alone +
                      south, data = not, family=binomial())

# appendix Model 2, Table A9
w.out.not2 <- weightit(black_number ~ openrace + ss_party + primary_type + gf + 
                      gender + black_dicho + south, 
                      data = not, estimand = "ATT", method = "ebal")
d.w.am2 <- svydesign(ids = ~1, weights = w.out.not2$weights,
                      data = not)
fit.am3 <- svyglm(narrow_race ~ black_number + openrace + ss_party + primary_type + 
                      gf + gender + black_dicho +
                      south, design = d.w.am2, family=quasibinomial())

#################### 
#### APPENDIX TABLE A7
#################### 
stargazer(fit.am, fit2, fit.am3,
                      star.char = c("*", "*"),
                      star.cutoffs = c(0.05, 0.01))

# Simulating predicted probabilities for Figure 2
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.am$coef
vcv <- vcov(fit.am)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.am$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
                      betas = betas, sim.sd = apply(sim.betas, 2, sd), 
                      se = sqrt(diag(vcv)))   

# Set values for all other variables 
black_number <- c(0,1)
openrace1 <- c(0,0)
dem_pres_av  <- c(55,55)
`primary_typeOpen Primary` <- c(1,1)
gf1 <- c(0,0)
gender <- c(0,0)
black_alone <- c(8,8)
south1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, black_number, openrace1, dem_pres_av, 
                    `primary_typeOpen Primary`, gf1, gender, black_alone, south1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(black_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

pe_not <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_not, prob = .025)
hi <- quantile(pe_not, prob = .975)

race_vector <- rbind.data.frame(race_vector, c(mean(pe_not), lo, hi))
colnames(race_vector) <- c("pe", "lo", "hi")

# running ttest discussed in manuscript
t.test(pe_strat, pe_not)

# saving simulations to create Figure 2
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")
saveRDS(race_vector, file = "figure_2_race.rds")

#################### 
## Love plots, APPENDIX FIGURE 7
#################### 
w.out.not <- weightit(black_number ~ openrace + dem_pres_av + primary_type + gf + 
                      gender + black_alone + south, 
                      data = not, estimand = "ATT", method = "ebal")
w.out.not2 <- weightit(black_number ~ openrace + dem_pres_av + primary_type + gf + 
                      gender + black_alone + south, 
                      data = not, estimand = "ATT", method = "cbps")

new.names.not = c(openrace = "Open Seat", dem_pres_av = "District Presidential Vote", 
                  `primary_type_Open Primary` = "Open Primary", 
                  gf = "Pre/Post Protests", gender = "Female Candidate", 
                  black_alone = "% District Black Pop.",
                  ss_party_Competitive = "Two-Party Competitive", 
                  south = "Southern State")

love.plot(black_number ~ openrace + dem_pres_av + primary_type + gf + gender + 
                  black_alone + south, 
                  data = not, estimand = "ATT",
                  stats = c("mean.diffs"),
                  weights = list(w1 = get.w(w.out.not),
                                 w2 = get.w(w.out.not2)),
                  var.order = "unadjusted",
                  abs = TRUE,
                  line = FALSE, 
                  title = NULL,
                  size =  5,
                  threshold = c(mean.diffs = .05,
                                ks = .05),
                  var.names = new.names.not,
                  colors = c("black", "black", "black"),
                  shapes = c("triangle filled", "circle filled", "square filled"),
                  sample.names = c("Unweighted", "Entropy Balanced", "CBPS Weighted"),
                  limits = list(mean.diffs = c(0, .55),
                                ks = c(0, .55)),
                  wrap = 20, grid = FALSE,
                  position = "top",
                  themes = list(theme(text = element_text(size=20)),
                                theme(text = element_text(size=20))))
